ZhongXu has asked that I make him the HODs from the SHAMs I made him. This should be pretty straightforward so I'll do it quickly here.
In [1]:
import numpy as np
import astropy
from pearce.mocks import cat_dict
from pearce.mocks.assembias_models.table_utils import compute_prim_haloprop_bins
import h5py
In [2]:
from matplotlib import pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set()
from itertools import cycle
In [3]:
%%bash
ls /home/users/swmclau2/scratch/*mpeak*.hdf5 -ltr
In [4]:
from collections import Counter
def compute_occupations(halo_catalog, galaxy_catalog):
#halo_table = cat.halocat.halo_table[cat.halocat.halo_table['halo_mvir'] > min_ptcl*cat.pmass]
cens_occ = np.zeros((np.sum(halo_catalog['halo_upid'] == -1),))
#cens_occ = np.zeros((len(halo_table),))
sats_occ = np.zeros_like(cens_occ)
detected_central_ids = set(galaxy_catalog[galaxy_catalog['halo_upid']==-1]['halo_id'])
detected_satellite_upids = Counter(galaxy_catalog[galaxy_catalog['halo_upid']!=-1]['halo_upid'])
for idx, row in enumerate(halo_catalog[halo_catalog['halo_upid'] == -1]):
if idx%1000000 == 0:
print idx
cens_occ[idx] = 1.0 if row['halo_id'] in detected_central_ids else 0.0
sats_occ[idx]+= detected_satellite_upids[row['halo_id']]
return cens_occ, sats_occ
In [5]:
from math import ceil
def compute_mass_bins(prim_haloprop, dlog10_prim_haloprop=0.05):
lg10_min_prim_haloprop = np.log10(np.min(prim_haloprop))-0.001
lg10_max_prim_haloprop = np.log10(np.max(prim_haloprop))+0.001
num_prim_haloprop_bins = (lg10_max_prim_haloprop-lg10_min_prim_haloprop)/dlog10_prim_haloprop
return np.logspace(
lg10_min_prim_haloprop, lg10_max_prim_haloprop,
num=int(ceil(num_prim_haloprop_bins)))
In [6]:
from glob import glob
#hdf5_files = glob('../*catalog_ab_halo*fixed*.hdf5')
scratch_path = '/home/users/swmclau2/scratch/'
hdf5_files = ['catalog_ab_halo_vmax@mpeak.hdf5','catalog_ab_halo_mpeak.hdf5', 'catalog_ab_halo_mpeak_shuffled.hdf5']
for fname in hdf5_files:
f = h5py.File(scratch_path+ fname, "r")
print fname
print len(f.keys())
print '*'*50
In [ ]:
paths = ['halo_vmax@mpeak_catalog', 'halo_mpeak_catalog', 'halo_mpeak_shuffled']
In [ ]:
Lbox = 1000.0
#catalog = np.loadtxt('ab_sham_hod_data_cut.npy')
sim = ''
cen_hods = []
sat_hods = []
halo_catalog = astropy.table.Table.read(scratch_path+'catalog_ab_%s_large.hdf5'%('halo_mpeak'), format = 'hdf5')
mass_bins = compute_mass_bins(halo_catalog['halo_mvir'], 0.2)
mass_bin_centers = (mass_bins[1:]+mass_bins[:-1])/2.0
simname = 'ds_' if sim=='' else sim
np.savetxt(simname+'mass_bins.npy', mass_bins)
for fname, path in zip(hdf5_files, paths):
print path
#for ab_property in ('halo_vmax@mpeak', 'halo_mpeak'):
# for shuffle in (False, True):
# if shuffle and ab_property=='halo_vmax@mpeak':
# continue
# if not shuffle:
# galaxy_catalog = astropy.table.Table.read('../%scatalog_ab_%s_fixed.hdf5'%(sim,ab_property), format = 'hdf5',
# path = '%scatalog_ab_%s.hdf5'%(sim,ab_property))
# else:
galaxy_catalog = astropy.table.Table.read(scratch_path + fname, path = path, format = 'hdf5')
cens_occ, sats_occ = compute_occupations(halo_catalog, galaxy_catalog)
host_halo_mass = halo_catalog[ halo_catalog['halo_upid']==-1]['halo_mvir']
host_halo_masses.append(host_halo_mass)
cenmask = galaxy_catalog['halo_upid']==-1
satmask = galaxy_catalog['halo_upid']>0
halo_mass = halo_catalog['halo_mvir']
cen_hod = hod_from_mock(galaxy_catalog['halo_mvir_host_halo'][cenmask], halo_mass, mass_bins)[0]
sat_hod = hod_from_mock(galaxy_catalog['halo_mvir_host_halo'][satmask], halo_mass, mass_bins)[0]
cen_hods.append(cen_hod)
sat_hods.append(sat_hod)
#if not shuffle:
# np.savetxt('%scatalog_ab_%s_cen_hod.npy'%(sim,ab_property), cen_hod)
# np.savetxt('%scatalog_ab_%s_sat_hod.npy'%(sim,ab_property),sat_hod)
#else:
# np.savetxt('%scatalog_ab_%s_shuffled_cen_hod.npy'%(sim,ab_property), cen_hod)
# np.savetxt('%scatalog_ab_%s_shuffled_sat_hod.npy'%(sim,ab_property),sat_hod)
In [ ]:
for cen_hod in cen_hods:
plt.plot(mass_bin_centers, cen_hod)
plt.loglog();
In [ ]:
for sat_hod in sat_hods:
plt.plot(mass_bin_centers, sat_hod)
plt.loglog();
In [ ]:
from itertools import izip
for cen_hod, sat_hod in izip(cen_hods, sat_hods):
plt.plot(mass_bin_centers, cen_hod+sat_hod)
plt.loglog();
In [ ]:
plt.hist(halocats['chinchilla']['halo_mvir'], bins = mass_bins, label = 'Chinchilla')
plt.hist(halocats['aardvark']['halo_mvir'], bins = mass_bins, alpha = 0.3, label = 'Aardvark');
plt.legend(loc = 'best')
plt.yscale('log')
plt.xscale('log')
#plt.loglog()
In [ ]:
rbins = np.logspace(-1, 1.7, 15)
rpoints = (rbins[1:]+rbins[:-1])/2
In [ ]:
nd = 1e-3
n_obj_needed = int(nd*(cat.Lbox**3))
halo_clusterings = {}
for simname, halocat in halocats.iteritems():
sort_idxs = np.argsort(halocat['halo_mvir'])
tmp_halocat = halocat[sort_idxs[-1*n_obj_needed:]]
print np.min(tmp_halocat['halo_mvir'])
pos = np.c_[tmp_halocat['halo_x'], tmp_halocat['halo_y'], tmp_halocat['halo_z']]
halo_clusterings[simname] = tpcf(pos, rbins, period=cat.Lbox)
In [ ]:
for simname in simnames:
plt.plot(rpoints, halo_clusterings[simname], label = simname)
plt.loglog();
plt.legend(loc='best')
plt.xlabel('r [Mpc]')
plt.ylabel('xi Halo, nd = %.2e'%nd)
In [ ]:
pos = np.c_[galcats['chinchilla']['halo_x'], galcats['chinchilla']['halo_y'], galcats['chinchilla']['halo_z']]
chin_xi = tpcf(pos, rbins, period=cat.Lbox)
In [ ]:
pos = np.c_[galcats['aardvark']['halo_x'], galcats['aardvark']['halo_y'], galcats['aardvark']['halo_z']]
aard_xi = tpcf(pos, rbins, period=cat.Lbox)
In [ ]:
plt.plot(rpoints, chin_xi, label = 'Chinchilla')
plt.plot(rpoints, aard_xi, label = 'Aardvark')
plt.loglog();
plt.legend(loc='best')
plt.xlabel('r [Mpc]')
plt.ylabel('xi')
In [ ]:
plt.plot(rpoints, chin_xi/halo_clusterings['chinchilla'], label = 'Chinchilla')
plt.plot(rpoints, aard_xi/halo_clusterings['aardvark'], label = 'Aardvark')
#plt.loglog();
plt.xscale('log')
plt.legend(loc='best')
plt.xlabel('r [Mpc]')
plt.ylabel('xi')
In [ ]:
plt.plot(rpoints, chin_xi/aard_xi, label = 'Chinchilla/Aardvark')
#plt.plot(rpoints, aard_xi, label = 'Aardvark')
#plt.loglog();
plt.xscale('log')
plt.legend(loc='best')
plt.xlabel('r [Mpc]')
plt.ylabel('xi')
In [ ]:
conc_bins = np.linspace(0, 1, 10)#np.linspace(0, 1000, 10)#np.linspace(0, 22, 10)
In [ ]:
#sns.set_palette(sns.diverging_palette(255, 133, l=60, n=22, center="light"))
colors =sns.cubehelix_palette(22,start=.5, rot=-.75)
In [ ]:
i = 0
fig = plt.figure(figsize = ((10,8)))
for simname, host_halo_mass, cens_occ, sats_occ, ls in izip(simnames, host_halo_masses, cens_occs, sats_occs, ['-', '--']):
mass_bin_idxs = compute_prim_haloprop_bins(prim_haloprop_bin_boundaries=mass_bins, prim_haloprop =host_halo_mass)
host_halo_conc =halocats[simname][ halocats[simname]['halo_upid']==-1]['halo_nfw_conc']
conditional_conc_percentiles = compute_conditional_percentiles(prim_haloprop = host_halo_mass,\
sec_haloprop = host_halo_conc,\
prim_haloprop_bin_boundaries = mass_bins)
mass_bin_nos = range(1,21,1)
for bin_no, c in zip(mass_bin_nos, colors):
bin_center = np.mean(mass_bins[bin_no:bin_no+2])
indices_of_mb = np.where(mass_bin_idxs == bin_no)[0]
cens_avg, sats_avg = np.mean(cens_occ[indices_of_mb]), np.mean(sats_occ[indices_of_mb])
#med_conc = np.median([indices_of_mb, 5])
med_conc = 0.5
(binned_cens, c_bins,_), (binned_sats,_,_) = binned_statistic(conditional_conc_percentiles[indices_of_mb],\
cens_occ[indices_of_mb],bins=conc_bins), \
binned_statistic(conditional_conc_percentiles[indices_of_mb],\
sats_occ[indices_of_mb],bins=conc_bins)
cen_bin_counts, _, _ = binned_statistic(conditional_conc_percentiles[indices_of_mb],\
cens_occ[indices_of_mb], bins = conc_bins, statistic='sum')
sat_bin_counts, _, _ = binned_statistic(conditional_conc_percentiles[indices_of_mb],\
sats_occ[indices_of_mb], bins = conc_bins, statistic='sum')
c_bin_centers = (c_bins[1:]+c_bins[:-1])/2
#plt.plot(c_bin_centers,(binned_sats-sats_avg), color = c,lw=2.5, label = r"%.1f $\log M_{\odot}$"%np.log10(bin_center) )
#plt.errorbar(c_bin_centers,(binned_sats-sats_avg), yerr=np.sqrt(binned_sats/sat_bin_counts),color = c,label = r"%.1f $\log M_{\odot}$"%np.log10(bin_center))
if i >0:
plt.plot(c_bin_centers,(binned_cens-cens_avg),color = c, ls = ls)
else:
plt.plot(c_bin_centers,(binned_cens-cens_avg),color = c,label = r"%.1f $\log M_{\odot}$"%np.log10(bin_center), ls = ls)
i+=1
#plt.xscale('log')
plt.legend(loc='best')
#plt.xlim([0,25])
plt.ylim([-0.1,1.1])
#plt.xlim([-0.05, 1.2])
#plt.yscale('log')
plt.title(r"$N(c)$ distribution in fixed mass bins, Vpeak SHAM")
plt.show()
In [ ]:
i = 0
fig = plt.figure(figsize = ((10,8)))
for simname, host_halo_mass, cens_occ, sats_occ, ls in izip(simnames, host_halo_masses, cens_occs, sats_occs, ['-', '--']):
mass_bin_idxs = compute_prim_haloprop_bins(prim_haloprop_bin_boundaries=mass_bins, prim_haloprop =host_halo_mass)
host_halo_conc =halocats[simname][ halocats[simname]['halo_upid']==-1]['halo_nfw_conc']
conditional_conc_percentiles = compute_conditional_percentiles(prim_haloprop = host_halo_mass,\
sec_haloprop = host_halo_conc,\
prim_haloprop_bin_boundaries = mass_bins)
mass_bin_nos = range(1,21,1)
for bin_no, c in zip(mass_bin_nos, colors):
bin_center = np.mean(mass_bins[bin_no:bin_no+2])
indices_of_mb = np.where(mass_bin_idxs == bin_no)[0]
cens_avg, sats_avg = np.mean(cens_occ[indices_of_mb]), np.mean(sats_occ[indices_of_mb])
#med_conc = np.median([indices_of_mb, 5])
med_conc = 0.5
(binned_cens, c_bins,_), (binned_sats,_,_) = binned_statistic(conditional_conc_percentiles[indices_of_mb],\
cens_occ[indices_of_mb],bins=conc_bins), \
binned_statistic(conditional_conc_percentiles[indices_of_mb],\
sats_occ[indices_of_mb],bins=conc_bins)
cen_bin_counts, _, _ = binned_statistic(conditional_conc_percentiles[indices_of_mb],\
cens_occ[indices_of_mb], bins = conc_bins, statistic='sum')
sat_bin_counts, _, _ = binned_statistic(conditional_conc_percentiles[indices_of_mb],\
sats_occ[indices_of_mb], bins = conc_bins, statistic='sum')
c_bin_centers = (c_bins[1:]+c_bins[:-1])/2
#plt.plot(c_bin_centers,(binned_sats-sats_avg), color = c,lw=2.5, label = r"%.1f $\log M_{\odot}$"%np.log10(bin_center) )
#plt.errorbar(c_bin_centers,(binned_sats-sats_avg), yerr=np.sqrt(binned_sats/sat_bin_counts),color = c,label = r"%.1f $\log M_{\odot}$"%np.log10(bin_center))
if i >0:
plt.plot(c_bin_centers,(binned_cens),color = c, ls = ls)
else:
plt.plot(c_bin_centers,(binned_cens),color = c,label = r"%.1f $\log M_{\odot}$"%np.log10(bin_center), ls = ls)
i+=1
#plt.xscale('log')
plt.legend(loc='best')
#plt.xlim([0,25])
plt.ylim([-0.1,1.1])
plt.xlim([-0.05, 1.2])
#plt.yscale('log')
plt.title(r"$N(c)$ distribution in fixed mass bins, Vpeak SHAM")
plt.show()
In [ ]:
from collections import defaultdict
mass_bin_nos = range(1,21,1)
binned_censes = defaultdict(list)
cens_avgs = defaultdict(list)
bin_centers = []
for bin_no, c in zip(mass_bin_nos, colors):
for simname, host_halo_mass, cens_occ, sats_occ, ls in izip(simnames, host_halo_masses, cens_occs, sats_occs, ['-', '--']):
mass_bin_idxs = compute_prim_haloprop_bins(prim_haloprop_bin_boundaries=mass_bins, prim_haloprop =host_halo_mass)
host_halo_conc =halocats[simname][ halocats[simname]['halo_upid']==-1]['halo_nfw_conc']
conditional_conc_percentiles = compute_conditional_percentiles(prim_haloprop = host_halo_mass,\
sec_haloprop = host_halo_conc,\
prim_haloprop_bin_boundaries = mass_bins)
bin_center = np.mean(mass_bins[bin_no:bin_no+2])
if simname == 'chinchilla':
bin_centers.append(bin_center)
indices_of_mb = np.where(mass_bin_idxs == bin_no)[0]
cens_avg, sats_avg = np.mean(cens_occ[indices_of_mb]), np.mean(sats_occ[indices_of_mb])
cens_avgs[simname].append(cens_avg)
#med_conc = np.median([indices_of_mb, 5])
med_conc = 0.5
(binned_cens, c_bins,_), (binned_sats,_,_) = binned_statistic(conditional_conc_percentiles[indices_of_mb],\
cens_occ[indices_of_mb],bins=conc_bins), \
binned_statistic(conditional_conc_percentiles[indices_of_mb],\
sats_occ[indices_of_mb],bins=conc_bins)
binned_censes[simname].append(binned_cens)
cen_bin_counts, _, _ = binned_statistic(conditional_conc_percentiles[indices_of_mb],\
cens_occ[indices_of_mb], bins = conc_bins, statistic='sum')
sat_bin_counts, _, _ = binned_statistic(conditional_conc_percentiles[indices_of_mb],\
sats_occ[indices_of_mb], bins = conc_bins, statistic='sum')
c_bin_centers = (c_bins[1:]+c_bins[:-1])/2
#plt.plot(c_bin_centers,(binned_sats-sats_avg), color = c,lw=2.5, label = r"%.1f $\log M_{\odot}$"%np.log10(bin_center) )
#plt.errorbar(c_bin_centers,(binned_sats-sats_avg), yerr=np.sqrt(binned_sats/sat_bin_counts),color = c,label = r"%.1f $\log M_{\odot}$"%np.log10(bin_center))
In [ ]:
for binned_cens_chin, binned_cens_aard, cens_avg_chin, cens_avg_aard, bin_center, c in izip(binned_censes['chinchilla'],\
binned_censes['aardvark'],\
cens_avgs['chinchilla'],\
cens_avgs['aardvark'],\
bin_centers,
colors):
print binned_cens_chin
print cens_avg_chin
plt.plot(c_bin_centers,(binned_cens_chin-cens_avg_chin),color = c)#,label = r"%.1f $\log M_{\odot}$"%np.log10(bin_center))
plt.plot(c_bin_centers,(binned_cens_aard-cens_avg_aard),color = c, ls = '--')
#plt.xscale('log')
plt.legend(loc='best')
#plt.xlim([0,25])
#plt.ylim([-0.1,1.1])
plt.xlim([-0.05, 1.2])
#plt.yscale('log')
plt.title(r"$N(c|M)$ Chinchilla vs. Aardvark in %.1f $\log M_{\odot}$, Vpeak SHAM, "%np.log10(bin_center))
plt.show()
In [ ]:
for binned_cens_chin, binned_cens_aard, cens_avg_chin, cens_avg_aard, bin_center, c in izip(binned_censes['chinchilla'],\
binned_censes['aardvark'],\
cens_avgs['chinchilla'],\
cens_avgs['aardvark'],\
bin_centers,
colors):
plt.plot(c_bin_centers,(binned_cens_chin-cens_avg_chin)/(binned_cens_aard-cens_avg_aard),color = c)#,label = r"%.1f $\log M_{\odot}$"%np.log10(bin_center))
#plt.xscale('log')
plt.legend(loc='best')
#plt.xlim([0,25])
#plt.ylim([-0.1,1.1])
plt.xlim([-0.05, 1.2])
#plt.yscale('log')
plt.title(r"$N(c|M)$ Chinchilla/Aardvark in %.1f $\log M_{\odot}$, Vpeak SHAM, "%np.log10(bin_center))
plt.show()
In [ ]: